import json
import csv
import pandas as pd
def log_message(message):
  print("=" * 20 + str(message) + "=" * 20)

# 获取json文件的数据
def get_data_from_json(path='../data/json/', filename='links.json'):
  file_path = path + filename
  with open(file_path, 'r', encoding='utf-8') as f:
    return json.load(f)

# 获取csv文件数据
def get_data_from_csv(path, file_name):
  file_path = path + file_name
  df = pd.read_csv(file_path)
  columns = df.columns
  # 过滤掉所有列都是 NaN 的行
  df = df.dropna()
  result = []
  for row in df.itertuples(index=True):
    obj = {}
    for _, col_name in enumerate(columns):
      obj[col_name] = getattr(row, col_name)
    result.append(obj)
  return result


# 导出到CSV文件
def store_data_to_csv(data, filename, path='../data/llm-pretrain-data/'):
  file_path = path + filename
  keys = data[0].keys()
  with open(file_path, mode='w', newline='', encoding='utf-8') as csv_file:
    writer = csv.DictWriter(csv_file, fieldnames=keys)
    # 写入表头
    writer.writeheader()
    for item in data:
      # 写入数据
      writer.writerow(item)
